Happy or Sad Image Classifier


In [1]:
#Step 1: Use Pretrained VGG16 network and extract the features.

from keras.applications import VGG16,imagenet_utils
from keras.preprocessing.image import load_img,img_to_array
import numpy as np
from keras.models import Model
preprocess = imagenet_utils.preprocess_input
model = VGG16(weights="imagenet")
new_model = Model(inputs=model.input,outputs=model.layers[21].output)

def convert_img_to_vector(img_path):
    image = load_img(img_path,target_size=(224,224))
    image = img_to_array(image)
    image = np.expand_dims(image,axis=0)
    image = preprocess(image)
    return image

def get_image_feature(img_path):
    feats = np.transpose(new_model.predict(convert_img_to_vector(img_path)))
    return feats


Using TensorFlow backend.

In [2]:
#2. Import the custom dataset and compute the top 4096 features.
import glob
train_happy = glob.glob('./data/train/happy_face/*')
train_sad = glob.glob('./data/train/sad_face/*')

total_train = len(train_happy) + len(train_sad)

val_happy = glob.glob('./data/validation/happy_face/*')
val_sad = glob.glob('./data/validation/sad_face/*')

total_val = len(val_happy) + len(val_sad)

feats_train_happy = np.array([[get_image_feature(filename)] for filename in train_happy])
feats_train_sad = np.array([[get_image_feature(filename)] for filename in train_sad])
feats_train = np.hstack([feats_train_happy,feats_train_sad]).reshape(-1,4096)
del feats_train_happy,feats_train_sad  #we dont require

feats_val_happy = np.array([[get_image_feature(filename)] for filename in val_happy])
feats_val_sad = np.array([[get_image_feature(filename)] for filename in val_sad])
feats_val = np.hstack([feats_val_happy,feats_val_sad]).reshape(-1,4096)
del feats_val_happy,feats_val_sad #we dont require
print(feats_train.shape,feats_val.shape)


(348, 4096) (16, 4096)

In [3]:
#3. Define out model.
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(256, activation="relu", input_shape=(4096,), kernel_initializer="normal"))
model.add(Dense(64, activation="relu", kernel_initializer="normal"))
model.add(Dense(16, activation="relu", kernel_initializer="normal"))
model.add(Dense(1, activation="sigmoid", kernel_initializer="normal"))
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [4]:
#4. Defining our Output label.
train_labels = np.array([[0]*174 + [1] * 174]).reshape(-1,1)  #174 happy image, sad image for training respectively
validation_labels = np.array([[0] * 8 + [1] * 8]).reshape(-1,1) #8 validation happy image and sad image.
print(train_labels.shape,validation_labels.shape)


(348, 1) (16, 1)

In [5]:
#5.train our model
import keras
earlyStopping=keras.callbacks.EarlyStopping(monitor='val_loss', patience=2, verbose=0, mode='auto')
model.fit(feats_train,
          train_labels,
          epochs=15,
          batch_size=16,
          validation_data=(feats_val,validation_labels),
          verbose=1,
          callbacks=[earlyStopping])
print('Training Completed!')


Train on 348 samples, validate on 16 samples
Epoch 1/15
348/348 [==============================] - 0s - loss: 0.6941 - acc: 0.4569 - val_loss: 0.6898 - val_acc: 0.5000
Epoch 2/15
348/348 [==============================] - 0s - loss: 0.6530 - acc: 0.6236 - val_loss: 0.7476 - val_acc: 0.4375
Epoch 3/15
348/348 [==============================] - 0s - loss: 0.5392 - acc: 0.7299 - val_loss: 0.7526 - val_acc: 0.5000
Epoch 4/15
348/348 [==============================] - 0s - loss: 0.3998 - acc: 0.8190 - val_loss: 1.0365 - val_acc: 0.5625
Training Completed!

In [6]:
#6. finding out the optimal threshold.
from sklearn.metrics import roc_curve
import pandas as pd
def Find_Optimal_Cutoff(target, predicted):
    """ Find the optimal probability cutoff point for a classification model related to event rate
    Parameters
    ----------
    target : Matrix with dependent or target data, where rows are observations

    predicted : Matrix with predicted data, where rows are observations

    Returns
    -------     
    list type, with optimal cutoff value

    """
    fpr, tpr, threshold = roc_curve(target, predicted)
    i = np.arange(len(tpr)) 
    roc = pd.DataFrame({'tf' : pd.Series(tpr-(1-fpr), index=i), 'threshold' : pd.Series(threshold, index=i)})
    roc_t = roc.iloc[(roc.tf-0).abs().argsort()[:1]]

    return list(roc_t['threshold']) 
predicted = np.array([model.predict_proba(feats.reshape(-1,4096),verbose=0).reshape(1)[0] for feats in feats_val]).reshape(-1,1)
threshold = Find_Optimal_Cutoff(validation_labels,predicted)[0]

In [7]:
#7. Writing our main predict function

def predict_mood(img_path):
    feats = get_image_feature(img_path)
    feats = feats.reshape(-1,4096)
    predicted_probab = model.predict_proba(feats,verbose=0)
    if predicted_probab > threshold:
        return 'Sad'
    else:
        return 'Happy'

In [8]:
from IPython.display import Image
Image(filename='./data/validation/happy_face/happy_woman.jpg')


Out[8]:

In [9]:
predict_mood('./data/validation/happy_face/happy_woman.jpg')


Out[9]:
'Happy'

In [10]:
from IPython.display import Image
Image(filename='./data/validation/sad_face/crying_baby.jpg')


Out[10]:

In [11]:
predict_mood('./data/validation/sad_face/crying_baby.jpg')


Out[11]:
'Sad'